import urlearning.Record as Record
import numpy as np

class RecordFile:

    def __init__(self, filename, hasHeader = True, delimiter = ","):
        self.filename = filename
        self.hasHeader = hasHeader
        self.delimiter = delimiter
        self.header = None
        self.records = []

    def __len__(self):
        return len(self.records)

    def __getitem__(self, key):
        return self.records[key]

    def __iter__(self):
        self.cur = 0
        return self

    def __next__(self):
        if self.cur >= len(self):
            raise StopIteration
        r = self[self.cur]
        self.cur += 1
        return r

    def append(self, record):
        self.records.append(record)

    def getVariableCount(self):
        if len(self.records) > 0:
            return len(self.records[0])

        return 0

    def read(self):
        recordFile = open(self.filename)

        for line in recordFile:
            line = line.strip()
            record = Record.Record(0)
            record.setValues(line, self.delimiter)
            if self.hasHeader and self.header == None:
                self.header = record
                continue
            self.records.append(record)

    def write(self):
        stream = open(self.filename, 'w')

        if self.hasHeader:
            self.header.write(stream, self.delimiter)
            stream.write("\n")

        for record in self.records:
            record.write(stream, self.delimiter)
            stream.write("\n")

        stream.close()

    def getValues(self):
        values = [{} for i in range(len(self.records[0])) ]

        for record in self.records:
            for i in range(len(record)):
                if record[i] not in values[i]:
                    values[i][ record[i] ] = len(values[i])

        return values

    def getCardinalities(self):
        cardinalities = np.zeros(self.getVariableCount(), dtype=np.int)
        values = self.getValues()

        for x in range(self.getVariableCount()):
            cardinalities[x] = len(values[x])

        return cardinalities

    # convert all of the records to numeric values.  That is, each string value
    # for each variable maps to a unique integer in (0, cardinality) based on
    # the mapping found in getValues.  The matrix is returned in numpy format.
    def toNumeric(self):
        values = self.getValues()
        numeric = np.zeros((len(self), self.getVariableCount()))

        for i in range(len(self)):
            for j in range(self.getVariableCount()):
                strVal = self.records[i][j]
                intVal = values[j][strVal]
                numeric[i][j] = intVal
        return numeric
        

